load libraries

In [1]:
import os
import cv2
import glob
import numpy as np
import pandas as pd

from keras.models import *
from keras.optimizers import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *
Using TensorFlow backend.
In [12]:
dir = "/ext/Data/distracted_driver_detection/"

model_image_size = (320, 480)
fine_tune_layer = 172
final_layer = 314
visual_layer = 311
batch_size = 128

加载数据集

load train data

In [3]:
train_gen = ImageDataGenerator(
    featurewise_std_normalization=True,
    samplewise_std_normalization=False,
    rotation_range=10.,
    width_shift_range=0.05,
    height_shift_range=0.05,
    shear_range=0.1,
    zoom_range=0.1,
)
gen = ImageDataGenerator(
    featurewise_std_normalization=True,
    samplewise_std_normalization=False,
)

train_generator = train_gen.flow_from_directory(os.path.join(dir, 'train'),  model_image_size, shuffle=True, batch_size=batch_size, class_mode="categorical")
print("subdior to train type {}".format(train_generator.class_indices))
valid_generator = gen.flow_from_directory(os.path.join(dir, 'valid'),  model_image_size, shuffle=True, batch_size=batch_size, class_mode="categorical")
print("subdior to valid type {}".format(valid_generator.class_indices))
Found 22424 images belonging to 10 classes.
subdior to train type {'c0': 0, 'c1': 1, 'c2': 2, 'c3': 3, 'c4': 4, 'c5': 5, 'c6': 6, 'c7': 7, 'c8': 8, 'c9': 9}
Found 641 images belonging to 10 classes.
subdior to valid type {'c0': 0, 'c1': 1, 'c2': 2, 'c3': 3, 'c4': 4, 'c5': 5, 'c6': 6, 'c7': 7, 'c8': 8, 'c9': 9}
In [4]:
input_tensor = Input((*model_image_size, 3))
x = input_tensor
x = Lambda(inception_v3.preprocess_input)(x)

base_model = InceptionV3(input_tensor=x, weights='imagenet', include_top=False)

x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.5)(x)
x = Dense(10, activation='softmax')(x)
model = Model(base_model.input, x)

print("total layer count {}".format(len(base_model.layers)))

for i in range(fine_tune_layer):
    model.layers[i].trainable = False
total layer count 312

训练模型

In [5]:
print("train_generator.samples = {}".format(train_generator.samples))
print("valid_generator.samples = {}".format(valid_generator.samples))
steps_train_sample = train_generator.samples // 128 + 1
steps_valid_sample = valid_generator.samples // 128 + 1
train_generator.samples = 22424
valid_generator.samples = 641
In [6]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_generator, steps_per_epoch=steps_train_sample, epochs=4, validation_data=valid_generator, validation_steps=steps_valid_sample)

model.save("models/inceptionV3-imagenet-finetune{}-adam.h5".format(fine_tune_layer))
print("model saved!")
Epoch 1/4
/home/jidou/anaconda3/lib/python3.6/site-packages/keras/preprocessing/image.py:514: UserWarning: This ImageDataGenerator specifies `featurewise_std_normalization`, but it hasn'tbeen fit on any training data. Fit it first by calling `.fit(numpy_data)`.
  warnings.warn('This ImageDataGenerator specifies '
176/176 [==============================] - 399s - loss: 0.1750 - acc: 0.9450 - val_loss: 0.6448 - val_acc: 0.8721
Epoch 2/4
176/176 [==============================] - 388s - loss: 0.0385 - acc: 0.9894 - val_loss: 1.0044 - val_acc: 0.8315
Epoch 3/4
176/176 [==============================] - 390s - loss: 0.0179 - acc: 0.9953 - val_loss: 0.3611 - val_acc: 0.9126
Epoch 4/4
176/176 [==============================] - 389s - loss: 0.0205 - acc: 0.9937 - val_loss: 0.5720 - val_acc: 0.8924
model saved!
In [7]:
model.compile(optimizer=RMSprop(lr=1*0.00001), loss='categorical_crossentropy', metrics=['accuracy'])
model.fit_generator(train_generator, steps_per_epoch=steps_train_sample, epochs=6, validation_data=valid_generator, validation_steps=steps_valid_sample)

model.save("models/inceptionV3-imagenet-finetune{}.h5".format(fine_tune_layer))
print("model saved!")
Epoch 1/6
/home/jidou/anaconda3/lib/python3.6/site-packages/keras/preprocessing/image.py:514: UserWarning: This ImageDataGenerator specifies `featurewise_std_normalization`, but it hasn'tbeen fit on any training data. Fit it first by calling `.fit(numpy_data)`.
  warnings.warn('This ImageDataGenerator specifies '
176/176 [==============================] - 399s - loss: 0.0111 - acc: 0.9966 - val_loss: 0.3748 - val_acc: 0.9204
Epoch 2/6
176/176 [==============================] - 389s - loss: 0.0057 - acc: 0.9986 - val_loss: 0.2672 - val_acc: 0.9314
Epoch 3/6
176/176 [==============================] - 390s - loss: 0.0039 - acc: 0.9989 - val_loss: 0.2979 - val_acc: 0.9314
Epoch 4/6
176/176 [==============================] - 390s - loss: 0.0029 - acc: 0.9991 - val_loss: 0.3104 - val_acc: 0.9314
Epoch 5/6
176/176 [==============================] - 393s - loss: 0.0016 - acc: 0.9997 - val_loss: 0.3149 - val_acc: 0.9329
Epoch 6/6
176/176 [==============================] - 390s - loss: 0.0016 - acc: 0.9995 - val_loss: 0.3046 - val_acc: 0.9376
model saved!
In [8]:
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.models import *

model = load_model("models/inceptionV3-imagenet-finetune{}.h5".format(fine_tune_layer))
print("load successed")

#SVG(model_to_dot(model).create(prog='dot', format='svg'))
load successed

CAM 可视化

http://cnnlocalization.csail.mit.edu/

$cam = (P-0.5)*w*output$

  • cam: 类激活图 X*X
  • P: 概率
  • output: 卷积层的输出 2048*1
  • w: 卷积核的权重 X*X*2048
In [9]:
z = zip([x.name for x in model.layers], range(len(model.layers)))
for k, v in z:
    print("{} - {}".format(k,v))
input_1 - 0
lambda_1 - 1
conv2d_1 - 2
batch_normalization_1 - 3
activation_1 - 4
conv2d_2 - 5
batch_normalization_2 - 6
activation_2 - 7
conv2d_3 - 8
batch_normalization_3 - 9
activation_3 - 10
max_pooling2d_1 - 11
conv2d_4 - 12
batch_normalization_4 - 13
activation_4 - 14
conv2d_5 - 15
batch_normalization_5 - 16
activation_5 - 17
max_pooling2d_2 - 18
conv2d_9 - 19
batch_normalization_9 - 20
activation_9 - 21
conv2d_7 - 22
conv2d_10 - 23
batch_normalization_7 - 24
batch_normalization_10 - 25
activation_7 - 26
activation_10 - 27
average_pooling2d_1 - 28
conv2d_6 - 29
conv2d_8 - 30
conv2d_11 - 31
conv2d_12 - 32
batch_normalization_6 - 33
batch_normalization_8 - 34
batch_normalization_11 - 35
batch_normalization_12 - 36
activation_6 - 37
activation_8 - 38
activation_11 - 39
activation_12 - 40
mixed0 - 41
conv2d_16 - 42
batch_normalization_16 - 43
activation_16 - 44
conv2d_14 - 45
conv2d_17 - 46
batch_normalization_14 - 47
batch_normalization_17 - 48
activation_14 - 49
activation_17 - 50
average_pooling2d_2 - 51
conv2d_13 - 52
conv2d_15 - 53
conv2d_18 - 54
conv2d_19 - 55
batch_normalization_13 - 56
batch_normalization_15 - 57
batch_normalization_18 - 58
batch_normalization_19 - 59
activation_13 - 60
activation_15 - 61
activation_18 - 62
activation_19 - 63
mixed1 - 64
conv2d_23 - 65
batch_normalization_23 - 66
activation_23 - 67
conv2d_21 - 68
conv2d_24 - 69
batch_normalization_21 - 70
batch_normalization_24 - 71
activation_21 - 72
activation_24 - 73
average_pooling2d_3 - 74
conv2d_20 - 75
conv2d_22 - 76
conv2d_25 - 77
conv2d_26 - 78
batch_normalization_20 - 79
batch_normalization_22 - 80
batch_normalization_25 - 81
batch_normalization_26 - 82
activation_20 - 83
activation_22 - 84
activation_25 - 85
activation_26 - 86
mixed2 - 87
conv2d_28 - 88
batch_normalization_28 - 89
activation_28 - 90
conv2d_29 - 91
batch_normalization_29 - 92
activation_29 - 93
conv2d_27 - 94
conv2d_30 - 95
batch_normalization_27 - 96
batch_normalization_30 - 97
activation_27 - 98
activation_30 - 99
max_pooling2d_3 - 100
mixed3 - 101
conv2d_35 - 102
batch_normalization_35 - 103
activation_35 - 104
conv2d_36 - 105
batch_normalization_36 - 106
activation_36 - 107
conv2d_32 - 108
conv2d_37 - 109
batch_normalization_32 - 110
batch_normalization_37 - 111
activation_32 - 112
activation_37 - 113
conv2d_33 - 114
conv2d_38 - 115
batch_normalization_33 - 116
batch_normalization_38 - 117
activation_33 - 118
activation_38 - 119
average_pooling2d_4 - 120
conv2d_31 - 121
conv2d_34 - 122
conv2d_39 - 123
conv2d_40 - 124
batch_normalization_31 - 125
batch_normalization_34 - 126
batch_normalization_39 - 127
batch_normalization_40 - 128
activation_31 - 129
activation_34 - 130
activation_39 - 131
activation_40 - 132
mixed4 - 133
conv2d_45 - 134
batch_normalization_45 - 135
activation_45 - 136
conv2d_46 - 137
batch_normalization_46 - 138
activation_46 - 139
conv2d_42 - 140
conv2d_47 - 141
batch_normalization_42 - 142
batch_normalization_47 - 143
activation_42 - 144
activation_47 - 145
conv2d_43 - 146
conv2d_48 - 147
batch_normalization_43 - 148
batch_normalization_48 - 149
activation_43 - 150
activation_48 - 151
average_pooling2d_5 - 152
conv2d_41 - 153
conv2d_44 - 154
conv2d_49 - 155
conv2d_50 - 156
batch_normalization_41 - 157
batch_normalization_44 - 158
batch_normalization_49 - 159
batch_normalization_50 - 160
activation_41 - 161
activation_44 - 162
activation_49 - 163
activation_50 - 164
mixed5 - 165
conv2d_55 - 166
batch_normalization_55 - 167
activation_55 - 168
conv2d_56 - 169
batch_normalization_56 - 170
activation_56 - 171
conv2d_52 - 172
conv2d_57 - 173
batch_normalization_52 - 174
batch_normalization_57 - 175
activation_52 - 176
activation_57 - 177
conv2d_53 - 178
conv2d_58 - 179
batch_normalization_53 - 180
batch_normalization_58 - 181
activation_53 - 182
activation_58 - 183
average_pooling2d_6 - 184
conv2d_51 - 185
conv2d_54 - 186
conv2d_59 - 187
conv2d_60 - 188
batch_normalization_51 - 189
batch_normalization_54 - 190
batch_normalization_59 - 191
batch_normalization_60 - 192
activation_51 - 193
activation_54 - 194
activation_59 - 195
activation_60 - 196
mixed6 - 197
conv2d_65 - 198
batch_normalization_65 - 199
activation_65 - 200
conv2d_66 - 201
batch_normalization_66 - 202
activation_66 - 203
conv2d_62 - 204
conv2d_67 - 205
batch_normalization_62 - 206
batch_normalization_67 - 207
activation_62 - 208
activation_67 - 209
conv2d_63 - 210
conv2d_68 - 211
batch_normalization_63 - 212
batch_normalization_68 - 213
activation_63 - 214
activation_68 - 215
average_pooling2d_7 - 216
conv2d_61 - 217
conv2d_64 - 218
conv2d_69 - 219
conv2d_70 - 220
batch_normalization_61 - 221
batch_normalization_64 - 222
batch_normalization_69 - 223
batch_normalization_70 - 224
activation_61 - 225
activation_64 - 226
activation_69 - 227
activation_70 - 228
mixed7 - 229
conv2d_73 - 230
batch_normalization_73 - 231
activation_73 - 232
conv2d_74 - 233
batch_normalization_74 - 234
activation_74 - 235
conv2d_71 - 236
conv2d_75 - 237
batch_normalization_71 - 238
batch_normalization_75 - 239
activation_71 - 240
activation_75 - 241
conv2d_72 - 242
conv2d_76 - 243
batch_normalization_72 - 244
batch_normalization_76 - 245
activation_72 - 246
activation_76 - 247
max_pooling2d_4 - 248
mixed8 - 249
conv2d_81 - 250
batch_normalization_81 - 251
activation_81 - 252
conv2d_78 - 253
conv2d_82 - 254
batch_normalization_78 - 255
batch_normalization_82 - 256
activation_78 - 257
activation_82 - 258
conv2d_79 - 259
conv2d_80 - 260
conv2d_83 - 261
conv2d_84 - 262
average_pooling2d_8 - 263
conv2d_77 - 264
batch_normalization_79 - 265
batch_normalization_80 - 266
batch_normalization_83 - 267
batch_normalization_84 - 268
conv2d_85 - 269
batch_normalization_77 - 270
activation_79 - 271
activation_80 - 272
activation_83 - 273
activation_84 - 274
batch_normalization_85 - 275
activation_77 - 276
mixed9_0 - 277
concatenate_1 - 278
activation_85 - 279
mixed9 - 280
conv2d_90 - 281
batch_normalization_90 - 282
activation_90 - 283
conv2d_87 - 284
conv2d_91 - 285
batch_normalization_87 - 286
batch_normalization_91 - 287
activation_87 - 288
activation_91 - 289
conv2d_88 - 290
conv2d_89 - 291
conv2d_92 - 292
conv2d_93 - 293
average_pooling2d_9 - 294
conv2d_86 - 295
batch_normalization_88 - 296
batch_normalization_89 - 297
batch_normalization_92 - 298
batch_normalization_93 - 299
conv2d_94 - 300
batch_normalization_86 - 301
activation_88 - 302
activation_89 - 303
activation_92 - 304
activation_93 - 305
batch_normalization_94 - 306
activation_86 - 307
mixed9_1 - 308
concatenate_2 - 309
activation_94 - 310
mixed10 - 311
global_average_pooling2d_1 - 312
dropout_1 - 313
dense_1 - 314
In [10]:
import matplotlib.pyplot as plt
import random
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

def show_heatmap_image(model_show, weights_show):
    test_dir = os.path.join(dir,  "test", "test" )
    image_files = glob.glob(os.path.join(test_dir,"*"))
    print(len(image_files))
    
    plt.figure(figsize=(12, 24))
    for i in range(10):
        plt.subplot(5, 2, i+1)
        img = cv2.imread(image_files[2000*i+113])
        img = cv2.resize(img,  (model_image_size[1],model_image_size[0]))
        x = img.copy()
        x.astype(np.float32)
        out, predictions = model_show.predict(np.expand_dims(x, axis=0))
        predictions = predictions[0]
        out = out[0]
        
        max_idx = np.argmax(predictions)
        prediction = predictions[max_idx]

        status = ["safe driving",  " texting - right",  "phone - right",  "texting - left",  "phone - left",  
                  "operation radio", "drinking", "reaching behind", "hair and makeup", "talking"]

        plt.title('c%d |%s| %.2f%%' % (max_idx , status[max_idx], prediction*100))
    
        cam = (prediction - 0.5) * np.matmul(out, weights_show)
        cam = cam[:,:,max_idx]
        cam -= cam.min()
        cam /= cam.max()
        cam -= 0.2
        cam /= 0.8

        cam = cv2.resize(cam, (model_image_size[1],model_image_size[0]))
        heatmap = cv2.applyColorMap(np.uint8(255*cam), cv2.COLORMAP_JET)
        heatmap[np.where(cam <= 0.2)] = 0

        out = cv2.addWeighted(img, 0.8, heatmap, 0.4, 0)

        plt.axis('off')
        plt.imshow(out[:,:,::-1])
print("done")
done
In [13]:
weights = model.layers[final_layer].get_weights()[0]
layer_output = model.layers[visual_layer].output
model2 = Model(model.input, [layer_output, model.output])
print("layer_output {0}".format(layer_output))
print("weights shape {0}".format(weights.shape))
show_heatmap_image(model2, weights)
layer_output Tensor("mixed10_1/concat:0", shape=(?, 8, 13, 2048), dtype=float32)
weights shape (2048, 10)
79726
In [14]:
def gen_kaggle_csv(model,  model_image_size, csv_name):
    dir = "/ext/Data/distracted_driver_detection/"

    gen = ImageDataGenerator()
    test_generator = gen.flow_from_directory(dir + "test/",  model_image_size, shuffle=False, 
                                             batch_size=batch_size, class_mode=None)
#     s = test_generator.__dict__
#     del s['filenames']
#     print(s)
    y_pred = model.predict_generator(test_generator,  steps=test_generator.samples//batch_size+1,  verbose=1)
    print("y_pred shape {}".format(y_pred.shape))
    y_pred = y_pred.clip(min=0.005, max=0.995)
    print(y_pred[:3])

    l = list()
    for i, fname in enumerate(test_generator.filenames):
        name = fname[fname.rfind('/')+1:]
        l.append( [name, *y_pred[i]] )

    l = np.array(l)
    data = {'img': l[:,0]}
    for i in range(10):
        data["c%d"%i] = l[:,i+1]
    df = pd.DataFrame(data, columns=['img'] + ['c%d'%i for i in range(10)])
    df.head(10)
    df = df.sort_values(by='img')
    df.to_csv(csv_name, index=None, float_format='%.3f')
    print("csv saved")

print("done")
done
In [15]:
gen_kaggle_csv(model,  model_image_size, 'csv/InceptionV3-imagenet-finetune{}-pred.csv'.format(fine_tune_layer))
Found 79726 images belonging to 1 classes.
623/623 [==============================] - 685s      
y_pred shape (79726, 10)
[[ 0.005  0.005  0.005  0.005  0.005  0.995  0.005  0.005  0.005  0.005]
 [ 0.005  0.005  0.005  0.005  0.995  0.005  0.005  0.005  0.005  0.005]
 [ 0.005  0.005  0.005  0.005  0.995  0.005  0.005  0.005  0.005  0.005]]
csv saved
In [ ]: